#This to obtain:
#table_A7.tex

setwd("/Users/bgpopescu/")
#setwd("C:/Users/bogdanp/")

rm(list=ls())
library("broom")
library("dplyr")
library("grid")
library(tidyr)
library(sandwich)
library(lfe)
library(broom)
library(dplyr)
library(tidyr)
library(grid)
library(stargazer)
library(lmtest)
library(multiwayvcov)
library("ggplot2")
library("glue")
library("spdep")
library("spatialreg")
library('dismo')
library("stargazer")
library("rgdal")
library("readxl")
library("stringr")
library(spatialEco)
library(RColorBrewer)
library("lemon")
library(ggrepel)


########################################################################
#Function for returning results that include controls for SP. Autocorr.#
########################################################################

spatial_regression <- function(data_sample, y, x, unique_observation_id) {
  clean_x = clean_list(x, "quad")
  clean_x = clean_list(clean_x, "bfe")
  
  specification = as.formula(paste(glue("{y}~"), 
                                   paste(clean_x, collapse = "+")))
  bw<-subset(data_sample, select =c(x, y, unique_observation_id))
  #Removing NAs
  bw<-bw[!sf::st_is_empty(bw), ]
  bw<-na.omit(bw)
  #Step1: the function "poly2nb" builds a neighbours list based on
  list_nb=poly2nb(bw, row.names = dplyr::pull(bw, unique_observation_id))
  #Step2: Convert nb to listw type
  list_nb_w=tryCatch(nb2listw(list_nb, zero.policy=TRUE), error=function(err) NA)
  #Step3: Spatial Filtering
  reg1_res_sp<-tryCatch(SpatialFiltering(specification,
                                         data = bw,
                                         nb = list_nb,
                                         style="W",
                                         ExactEV = T,
                                         zero.policy = T,
                                         na.action=na.exclude),
                        error=function(err) NA)
  
  print("Done Spatial Filtering")
  #Step4: Include fitted results"
  bw_fitted<-tryCatch(cbind(bw, as.data.frame(fitted(reg1_res_sp)),
                            id = row.names(bw)), error=function(err) 
                            {print("NO NEIGHBORS"); data_sample})
  #Keeping track of indices
  #This will allow us to see exactly which observations were used in the regressions
  row.names(bw_fitted) <- bw_fitted$id
  bw_fitted<-subset(bw_fitted, select = -c(id))
  list_fitted_ei<-names(bw_fitted[, grepl("vec", names(bw_fitted))])
  list_fitted_ei<-list_fitted_ei[list_fitted_ei != "Shape"]
  all_vars<-append(x, list_fitted_ei)
  specification_with_eigenv = as.formula(paste(glue("{y}~"), 
                                               paste(all_vars, collapse = "+")))
  
  print(specification_with_eigenv)
  model1 <- tryCatch(lm(specification_with_eigenv, data = bw_fitted, na.action = na.exclude), error=function(err) NA)
  
  reg_moran<-moran.test(x=resid(model1),listw=list_nb_w, zero.policy = T)$statistic
  reg_moran_p<-moran.test(x=resid(model1),listw=list_nb_w, zero.policy = T)$p.value
  reg_moran_star<-paste(round(reg_moran, digits = 3), "",
                        ifelse(reg_moran_p<.01,"***",
                               ifelse(reg_moran_p<.05,"**",
                                      ifelse(reg_moran_p<.1,"*",
                                             ""))), "", "", sep = "")
  
  
  return(list(model1, reg_moran_star))
}


######################################
#Function for cleaning the list of FE#
#####################################


clean_list <- function(old_list, particle) {
  term <- paste(glue("^{particle}"))
  new_list = list() 
  for (i in old_list) {
    #print(i)
    temp_list = c()
    
    for (j in i){
      if (!grepl(term, j)){
        temp_list <- c(temp_list, j)
      }
    }
    l = length(new_list)
    new_list[[l+1]] <- temp_list
  }
  return(new_list)}


rerun_regression<-function(regression_name, string_dv, orig_data_for_clusters, cluster_name){
  #Process:
  #-takes already existing regression objects, associates them with an 
  #"official" label, calculates robust clustered SE and CI based on a cluster
  #that is in the original dataset
  #and puts the results in a tibble
  #-this will be used for ggplot later
  
  #Input:
  #-Regression model object after lm
  #-The label for the DV that you want displayed in ggplot
  #-original dataset that contains thhe names of the clusters
  
  #Output:
  #-A tibble with regression coefficients from the model that you ran with:
  #--robust SE
  #--robust CI
  
  #Recreate dataframe from the regression object
  remake_data<-regression_name$model
  #Scale the DV
  remake_data[1]<-scale(remake_data[1])
  #Take the string of the DV and IV
  dv<-regression_name$terms[[2]]
  ivs<-regression_name$terms[[3]]
  ivs_str<-paste(ivs)[2]
  dv_str<-paste(dv, "~", collapse = "")
  dv_ivs_str<-paste(dv_str, ivs_str, sep=" ")
  #Create specification and run model
  specification = as.formula(dv_ivs_str)
  model <- lm(specification, data = remake_data)
  no_obs<-nobs(model)
  no_obs<-paste("Obs: ", as.character(no_obs), sep = "")
  #Calculate robust SE
  model_se<-se_robust_cluster(model, orig_data_for_clusters, cluster_name)
  #Create tibble
  model_tidy<-tidy(model)
  model_tidy$no_obs<-no_obs
  model_se_tidy<-tidy(model_se)
  names(model_se_tidy)[1] <- "term"
  names(model_se_tidy)[2] <- "robust.se"
  model_tidy2<-left_join(model_tidy, model_se_tidy, by = c("term"="term"))
  
  #Calculate CI
  df_ci<-as.data.frame(ci_robust_cluster(model, orig_data_for_clusters, cluster_name))
  df_ci <- cbind(term = rownames(df_ci), df_ci)
  names(df_ci)[2] <- "lb_95"
  names(df_ci)[3] <- "ub_95"
  
  #Merge CI to the dataframe
  model_tidy3<-left_join(model_tidy2, df_ci, by = c("term"="term"))
  #Only keep the treat variable
  model_tidy4<-subset(model_tidy3, term=="treat")
  model_tidy4$dv_official<-string_dv
  return(model_tidy4)
}

se_robust_cluster <- function(x, data_sample_shape, name_cluster){
  data_model<-x$model
  data_model$in_regression<-1
  data_model<-subset(data_model, select=c(in_regression))
  #Create index
  data_model <- tibble::rowid_to_column(data_model, "index")
  data2<-tibble::rowid_to_column(data_sample_shape, "index")
  #Merge by rowname or index
  data_merged <- left_join(data_model, data2, by=c("index" = "index"))
  #Remove any missing
  data_subset<-subset(data_merged, !is.na(in_regression))
  dat_clusters<-data_subset[[name_cluster]]
  res<-coeftest(x, vcov = cluster.vcov(x, dat_clusters, force_posdef=T))[, 2]
  return(res)}

ci_robust_cluster <- function(x, data_sample_shape, name_cluster){
  data_model<-x$model
  data_model$in_regression<-1
  data_model<-subset(data_model, select=c(in_regression))
  #Create index
  data_model <- tibble::rowid_to_column(data_model, "index")
  data2<-tibble::rowid_to_column(data_sample_shape, "index")
  #Merge by rowname or index
  data_merged <- left_join(data_model, data2, by=c("index" = "index"))
  #Remove any missing
  data_subset<-subset(data_merged, !is.na(in_regression))
  dat_clusters<-data_subset[[name_cluster]]
  res_ci<-coefci(x, vcov = cluster.vcov(x, dat_clusters, force_posdef=T))
  return(res_ci)}



data <- st_read(dsn="./Dropbox/Legacies_Project/Analysis/data/data.gdb",
                 layer="census_1857_points")
data<-subset(data, country=='Croatia')
data<-subset(data, select = c("original_order_id", "treat", "lon", "lat",
                                "krajna6_NEAR_FID", "krajna6_distance", "NEAR_FID", "zagreb_distance"))
data<-subset(data, treat ==1 | treat==0)

########################################
#Making polygons spatial autocorelation#
########################################

#Step1: Read treatment polygon
krajna6_treat_HABS <- st_read(dsn="./Dropbox/Legacies_Project/Analysis/data/data.gdb",
                              layer="krajna6_treat_HABS_GCS")
sf::sf_use_s2(FALSE)
krajna6_treat_HABS<-subset(krajna6_treat_HABS, select = c(NAME_0))
krajna6_treat_HABS<-st_simplify(krajna6_treat_HABS, dTolerance = 0.005)
plot(krajna6_treat_HABS)

#Step2: Preparing geometry function
rename_geometry <- function(g, name){
  current = attr(g, "sf_column")
  names(g)[names(g)==current] = name
  st_geometry(g)=name
  g
}

#Step3: Applying the geometry function to the polygon
krajna6_treat_HABS = rename_geometry(krajna6_treat_HABS, "SHAPE")
krajna6_treat_KRAJ <- st_read(dsn="./Dropbox/Legacies_Project/Analysis/data/data.gdb",
                              layer="krajna_polygon_d_GCS")
krajna6_treat_KRAJ<-subset(krajna6_treat_KRAJ, select = c(name_regiment))
krajna6_treat_KRAJ<-st_simplify(krajna6_treat_KRAJ, dTolerance = 0.005)

st_geometry(krajna6_treat_HABS) <- "SHAPE"
names(krajna6_treat_KRAJ)[names(krajna6_treat_KRAJ)=="name_regiment"]<-"NAME_0"
krajna6_treat_KRAJ = rename_geometry(krajna6_treat_KRAJ, "SHAPE")

#Step4: Merging the treatment and control polygons
HRV_adm_comp <- st_combine(rbind(krajna6_treat_HABS, krajna6_treat_KRAJ))
class(krajna6_treat_HABS)
plot(HRV_adm_comp)
HRV_adm_comp<-st_make_valid(HRV_adm_comp)

#Step4: Creating Voronoi Polygons function
st_voronoi_point <- function(points){
  ## points must be POINT geometry
  if(!all(st_geometry_type(points) == "POINT")){
    stop("Input not  POINT geometries")
  }
  g_res = st_combine(st_geometry(points)) # make multipoint
  v_res = st_voronoi(g_res)
  v_res2 = st_collection_extract(v_res)
  return(v_res2[unlist(st_intersects(g_res, points))])
}


#Step5: Using the Voronoi polygons functions
v = st_voronoi_point(data)
pv = st_set_geometry(data, v)
plot(pv)

#Step6: Intersections with the Croatia polygon
x <- st_intersection(st_cast(pv), st_union(HRV_adm_comp))
plot(x)

#Step7: Calculating specific variables
data2<-x
data2$treat
data2$dist1 <- as.numeric( with (data2,ifelse(data2$treat==1, 1, -1)))
data2$dist2 <-data2$dist1*(data2$krajna6_distance/1000)
data2$krajna6_NEAR_FID
data2$zagreb_distance<-data2$zagreb_distance/1000
data2$krajna6_NEAR_FID

data2$bfe1 <- ifelse(data2$krajna6_NEAR_FID == 1, 1,0)
data2$bfe2 <- ifelse(data2$krajna6_NEAR_FID == 2, 1,0)
data2$bfe3 <- ifelse(data2$krajna6_NEAR_FID == 3, 1,0)
data2$bfe4 <- ifelse(data2$krajna6_NEAR_FID == 4, 1,0)
data2$bfe5 <- ifelse(data2$krajna6_NEAR_FID == 5, 1,0)
data2$bfe6 <- ifelse(data2$krajna6_NEAR_FID == 6, 1,0)
data2$bfe7 <- ifelse(data2$krajna6_NEAR_FID == 7, 1,0)
data2$bfe8 <- ifelse(data2$krajna6_NEAR_FID == 8, 1,0)

data2$POINT_X<-data2$lon
data2$POINT_Y<-data2$lat


data2$quad1<-ifelse((data2$POINT_X > 18 & data2$POINT_X< 20) & 
                       (data2$POINT_Y<47  & data2$POINT_Y>44), 1, 0)


data2$quad2<-ifelse((data2$POINT_X > 16 & data2$POINT_X< 18) & 
                       (data2$POINT_Y<47  & data2$POINT_Y>44), 1, 0)


data2$quad3<-ifelse((data2$POINT_X > 14 & data2$POINT_X< 16) & 
                       (data2$POINT_Y<47  & data2$POINT_Y>44), 1, 0)



data_excel <- read_excel("./Dropbox/Legacies_Project/Analysis/data/census_1857.xlsx", sheet=1, col_names = TRUE, skip = 0)
data5<-left_join(data2, data_excel, by =c("original_order_id" = "original_order_id"))

specifications <- list(c('treat', 
                         'POINT_X', 'POINT_Y', 
                         'zagreb_distance', 
                         'bfe1', 'bfe2', 'bfe3', 'bfe4', 'bfe5', 'bfe6', 'bfe7',
                         'quad1', 'quad2', 'quad3'))



m_pct_priests_list<-spatial_regression(data5, "pct_priests", specifications[[1]], "place_german")
m_pct_priests<-m_pct_priests_list[[1]]
pct_priests_m<-m_pct_priests_list[[2]]


m_pct_civil_servants_list<-spatial_regression(data5, "pct_civil_servants", specifications[[1]], "place_german")
m_pct_civil_servants<-m_pct_civil_servants_list[[1]]
pct_civil_servants_m<-m_pct_civil_servants_list[[2]]

m_pct_military_list<-spatial_regression(data5, "pct_military", specifications[[1]], "place_german")
m_pct_military<-m_pct_military_list[[1]]
pct_military_m<-m_pct_military_list[[2]]
summary(m_pct_military)

m_pct_writers_artists_list<-spatial_regression(data5, "pct_writers_artists", specifications[[1]], "place_german")
m_pct_writers_artists<-m_pct_writers_artists_list[[1]]
pct_writers_artists_m<-m_pct_writers_artists_list[[2]]


m_pct_lawyers_list<-spatial_regression(data5, "pct_lawyers", specifications[[1]], "place_german")
m_pct_lawyers<-m_pct_lawyers_list[[1]]
pct_lawyers_m<-m_pct_lawyers_list[[2]]

m_pct_doctors_list<-spatial_regression(data5, "pct_doctors", specifications[[1]], "place_german")
m_pct_doctors<-m_pct_doctors_list[[1]]
pct_doctors_m<-m_pct_doctors_list[[2]]
summary(m_pct_doctors)

m_pct_manufacturer_list<-spatial_regression(data5, "pct_manufacturer", specifications[[1]], "place_german")
m_pct_manufacturer<-m_pct_manufacturer_list[[1]]
pct_manufacturer_m<-m_pct_manufacturer_list[[2]]

m_pct_merchants_list<-spatial_regression(data5, "pct_merchants", specifications[[1]], "place_german")
m_pct_merchants<-m_pct_merchants_list[[1]]
pct_merchants_m<-m_pct_merchants_list[[2]]

m_pct_fishermen_list<-spatial_regression(data5, "pct_fishermen", specifications[[1]], "place_german")
m_pct_fishermen<-m_pct_fishermen_list[[1]]
pct_fishermen_m<-m_pct_fishermen_list[[2]]

m_pct_agriculture_list<-spatial_regression(data5, "pct_agriculture", specifications[[1]], "place_german")
m_pct_agriculture<-m_pct_agriculture_list[[1]]
pct_agriculture_m<-m_pct_agriculture_list[[2]]

m_pct_worker_crafts_list<-spatial_regression(data5, "pct_worker_crafts", specifications[[1]], "place_german")
m_pct_worker_crafts<-m_pct_worker_crafts_list[[1]]
pct_worker_crafts_m<-m_pct_worker_crafts_list[[2]]

m_pct_worker_trade_list<-spatial_regression(data5, "pct_worker_trade", specifications[[1]], "place_german")
m_pct_worker_trade<-m_pct_worker_trade_list[[1]]
pct_worker_trade_m<-m_pct_worker_trade_list[[2]]

m_pct_day_laborers_list<-spatial_regression(data5, "pct_day_laborers", specifications[[1]], "place_german")
m_pct_day_laborers<-m_pct_day_laborers_list[[1]]
pct_day_laborers_m<-m_pct_day_laborers_list[[2]]

##############
#LIST MODELS#
##############

list_models<-list(m_pct_priests,
               m_pct_civil_servants,
               m_pct_military,
               m_pct_writers_artists,
               m_pct_lawyers,
               m_pct_doctors,
               m_pct_manufacturer,
               m_pct_merchants,
               m_pct_fishermen,
               m_pct_agriculture,
               m_pct_worker_crafts,
               m_pct_worker_trade,
               m_pct_day_laborers)


relevant_vars<-c("pct_priests",
                 "pct_civil_servants",
                 "pct_military",
                 "pct_writers_artists",
                 "pct_lawyers",
                 "pct_doctors",
                 "pct_manufacturer",
                 "pct_merchants",
                 "pct_fishermen",
                 "pct_agriculture",
                 "pct_worker_crafts",
                 "pct_worker_trade",
                 "pct_day_laborers")

##############
#MEANS AND SD#
##############
means<-list()
sds<-list()
for (i in relevant_vars){
  item_mean<-round(mean(data5[[i]], na.rm=T), 3)
  item_sd<-round(sd(data5[[i]], na.rm=T), 3)
  means[[length(means) + 1]] <- item_mean 
  sds[[length(sds) + 1]] <- item_sd 
}
means<-unlist(means)
sds<-unlist(sds)

##############
#LIST MORANi#
##############

list_moran_i<-c(pct_priests_m,
                pct_civil_servants_m,
                pct_military_m,
                pct_writers_artists_m,
                pct_lawyers_m,
                pct_doctors_m,
                pct_manufacturer_m,
                pct_merchants_m,
                pct_fishermen_m,
                pct_agriculture_m,
                pct_worker_crafts_m,
                pct_worker_trade_m,
                pct_day_laborers_m)



column_labels<-c("\\shortstack{Pct.\\\\ Priests}",
                 "\\shortstack{Pct.\\\\ Civil\\\\ Servants}", 
                 "\\shortstack{Pct.\\\\ Military}",
                 "\\shortstack{Pct.\\\\ Writers\\\\ Artists}",
                 "\\shortstack{Pct.\\\\ Lawyers}",              
                 "\\shortstack{Pct.\\\\ Doctors}",             
                 "\\shortstack{Pct.\\\\ Manufacturers}",        
                 "\\shortstack{Pct.\\\\ Merchants}",           
                 "\\shortstack{Pct.\\\\ Fishermen}",            
                 "\\shortstack{Pct.\\\\ Agriculture}",         
                 "\\shortstack{Pct.\\\\ Workers\\\\ Crafts}",  
                 "\\shortstack{Pct.\\\\ Workers\\\\ Trade}",  
                 "\\shortstack{Pct.\\\\ Day\\\\ Workers}")  
 
hist_mec2 = stargazer(list_models,
                      column.labels= column_labels,
                      keep = c("treat"),
                      se=list(se_robust_cluster(m_pct_priests, data5, "kreis_comitat_regiment_latin"),
                              se_robust_cluster(m_pct_civil_servants, data5, "kreis_comitat_regiment_latin"),
                              se_robust_cluster(m_pct_military, data5, "kreis_comitat_regiment_latin"),
                              se_robust_cluster(m_pct_writers_artists, data5, "kreis_comitat_regiment_latin"),
                              se_robust_cluster(m_pct_lawyers, data5, "kreis_comitat_regiment_latin"),
                              se_robust_cluster(m_pct_doctors, data5, "kreis_comitat_regiment_latin"),
                              se_robust_cluster(m_pct_manufacturer, data5, "kreis_comitat_regiment_latin"),
                              se_robust_cluster(m_pct_merchants, data5, "kreis_comitat_regiment_latin"),
                              se_robust_cluster(m_pct_fishermen, data5, "kreis_comitat_regiment_latin"),
                              se_robust_cluster(m_pct_agriculture, data5, "kreis_comitat_regiment_latin"),
                              se_robust_cluster(m_pct_worker_crafts, data5, "kreis_comitat_regiment_latin"),
                              se_robust_cluster(m_pct_worker_trade, data5, "kreis_comitat_regiment_latin"),
                              se_robust_cluster(m_pct_day_laborers, data5, "kreis_comitat_regiment_latin")),
                      covariate.labels=c("Military Territory"),
                      dep.var.caption = "Dependent variable:",
                      dep.var.labels.include = F,
                      omit.stat=c("LL","ser","f", "rsq"), no.space=TRUE, float=FALSE,
                      add.lines=list(c("Mean", means),
                                     c("SD", sds),
                                     c("Boundary FE", rep("Yes", 14)),
                                     c("Region FE", rep("Yes", 14)),
                                     c("Moran eigenvectors", rep("Yes", 14)),
                                     c("Moran's I Residual", list_moran_i)))
note_latex <- "\\multicolumn{14}{l} {\\parbox[t]{28cm}{ \\footnotesize \\textit{Notes:} 
Coefficients and County cluster robust standard errors in parantheses from OLS regressions. 
$^{*}$p$<$0.1; $^{**}$p$<$0.05; $^{***}$p$<$0.01.
The unit of analysis for the 1857 census can be city (stadt) or district (land bezirke, kreis, comitat). 
The data for the dependent variables is from Austrian Ministry of Internal Affairs (1859). See codebook for details.}}"
hist_mec2[grepl("Note", hist_mec2)] <- note_latex
cat(hist_mec2, file="./Dropbox/Legacies_Project/Paper/tables/table_A7.tex", sep="\n")

